library(dplyr)
library(lubridate)
library(ggplot2)
library(plotly)
library(scales)
library(glue)
#membaca data
sales <- read.csv("Adidas US Sales Datasets.csv")
head(sales)
#> Retailer Retailer.ID Invoice.Date Region State City
#> 1 Foot Locker 1185732 01-01-20 Northeast New York New York
#> 2 Foot Locker 1185732 02-01-20 Northeast New York New York
#> 3 Foot Locker 1185732 03-01-20 Northeast New York New York
#> 4 Foot Locker 1185732 04-01-20 Northeast New York New York
#> 5 Foot Locker 1185732 05-01-20 Northeast New York New York
#> 6 Foot Locker 1185732 06-01-20 Northeast New York New York
#> Product Price.per.Unit Units.Sold Total.Sales
#> 1 Men's Street Footwear 50 1200 600000
#> 2 Men's Athletic Footwear 50 1000 500000
#> 3 Women's Street Footwear 40 1000 400000
#> 4 Women's Athletic Footwear 45 850 382500
#> 5 Men's Apparel 60 900 540000
#> 6 Women's Apparel 50 1000 500000
#> Operating.Profit Operating.Margin Sales.Method
#> 1 300000 50 In-store
#> 2 150000 30 In-store
#> 3 140000 35 In-store
#> 4 133875 35 In-store
#> 5 162000 30 In-store
#> 6 125000 25 In-store
#mengecek struktur data
sales %>%
glimpse()
#> Rows: 9,648
#> Columns: 13
#> $ Retailer <chr> "Foot Locker", "Foot Locker", "Foot Locker", "Foot Lo…
#> $ Retailer.ID <int> 1185732, 1185732, 1185732, 1185732, 1185732, 1185732,…
#> $ Invoice.Date <chr> "01-01-20", "02-01-20", "03-01-20", "04-01-20", "05-0…
#> $ Region <chr> "Northeast", "Northeast", "Northeast", "Northeast", "…
#> $ State <chr> "New York", "New York", "New York", "New York", "New …
#> $ City <chr> "New York", "New York", "New York", "New York", "New …
#> $ Product <chr> "Men's Street Footwear", "Men's Athletic Footwear", "…
#> $ Price.per.Unit <int> 50, 50, 40, 45, 60, 50, 50, 50, 40, 45, 60, 50, 50, 5…
#> $ Units.Sold <int> 1200, 1000, 1000, 850, 900, 1000, 1250, 900, 950, 825…
#> $ Total.Sales <int> 600000, 500000, 400000, 382500, 540000, 500000, 62500…
#> $ Operating.Profit <int> 300000, 150000, 140000, 133875, 162000, 125000, 31250…
#> $ Operating.Margin <int> 50, 30, 35, 35, 30, 25, 50, 30, 35, 35, 30, 25, 50, 3…
#> $ Sales.Method <chr> "In-store", "In-store", "In-store", "In-store", "In-s…
data yang perlu diubah strukturnya: as.factor: Retailer, Retailer.ID, Region, State, City, Product, Sales.Method as.datetime: Invoice.Date
#merubah struktur data
sales_clean <-
sales %>%
# fungsi untuk mengubah beberapa kolom secara bersamaan ke tipe data yang sama
mutate_at(.vars = c("Retailer", "Retailer.ID", "Region", "State", "City", "Product", "Sales.Method"), # parameter untuk memilih kolom yang akan diubah tipe datanya
.funs = as.factor) %>% # parameter untuk fungsi tipe data yang dituju
mutate(Invoice.Date = dmy(Invoice.Date))
glimpse(sales_clean)
#> Rows: 9,648
#> Columns: 13
#> $ Retailer <fct> Foot Locker, Foot Locker, Foot Locker, Foot Locker, F…
#> $ Retailer.ID <fct> 1185732, 1185732, 1185732, 1185732, 1185732, 1185732,…
#> $ Invoice.Date <date> 2020-01-01, 2020-01-02, 2020-01-03, 2020-01-04, 2020…
#> $ Region <fct> Northeast, Northeast, Northeast, Northeast, Northeast…
#> $ State <fct> New York, New York, New York, New York, New York, New…
#> $ City <fct> New York, New York, New York, New York, New York, New…
#> $ Product <fct> Men's Street Footwear, Men's Athletic Footwear, Women…
#> $ Price.per.Unit <int> 50, 50, 40, 45, 60, 50, 50, 50, 40, 45, 60, 50, 50, 5…
#> $ Units.Sold <int> 1200, 1000, 1000, 850, 900, 1000, 1250, 900, 950, 825…
#> $ Total.Sales <int> 600000, 500000, 400000, 382500, 540000, 500000, 62500…
#> $ Operating.Profit <int> 300000, 150000, 140000, 133875, 162000, 125000, 31250…
#> $ Operating.Margin <int> 50, 30, 35, 35, 30, 25, 50, 30, 35, 35, 30, 25, 50, 3…
#> $ Sales.Method <fct> In-store, In-store, In-store, In-store, In-store, In-…
#Check missing value
anyNA(sales)
#> [1] FALSE
#Plot1 plot title: Total item sold based on date in any region plot type: line plot
#jumlah sales pada setiap retailer di masing2 region dengan cara base
sales_regbase <- aggregate(Total.Sales ~ Retailer + Region,
data = sales_clean,
FUN = sum)
sales_regbase[order(sales_regbase$Total.Sales, decreasing = T),]
#> Retailer Region Total.Sales
#> 28 West Gear West 137551280
#> 7 Foot Locker Northeast 65326474
#> 15 Sports Direct South 65258121
#> 25 Kohl's West 62302115
#> 19 Foot Locker Southeast 59669118
#> 20 Sports Direct Southeast 54178543
#> 2 Foot Locker Midwest 47987394
#> 24 Foot Locker West 37804709
#> 6 Amazon Northeast 36262590
#> 17 West Gear South 33087031
#> 16 Walmart South 33049858
#> 11 West Gear Northeast 32293733
#> 4 Sports Direct Midwest 26207191
#> 9 Sports Direct Northeast 24698097
#> 5 West Gear Midwest 22540586
#> 3 Kohl's Midwest 22229415
#> 21 Walmart Southeast 21005539
#> 22 West Gear Southeast 17491703
#> 1 Amazon Midwest 16835873
#> 8 Kohl's Northeast 14031168
#> 10 Walmart Northeast 13712005
#> 23 Amazon West 13365025
#> 26 Sports Direct West 12129045
#> 18 Amazon Southeast 10826333
#> 13 Foot Locker South 9307025
#> 27 Walmart West 6791008
#> 14 Kohl's South 3552055
#> 12 Amazon South 409091
#data untuk plot1
sales_region <-
sales_clean %>%
filter(Product %in% "Men's Street Footwear") %>%
group_by(Retailer, Region) %>%
summarise(Total.Sales = sum(Total.Sales)) %>%
arrange(-Total.Sales)
sales_region
#> # A tibble: 28 × 3
#> # Groups: Retailer [6]
#> Retailer Region Total.Sales
#> <fct> <fct> <int>
#> 1 West Gear West 27016671
#> 2 Foot Locker Northeast 18267693
#> 3 Foot Locker Midwest 15327737
#> 4 Foot Locker Southeast 13573667
#> 5 Kohl's West 12792583
#> 6 Sports Direct Southeast 11465945
#> 7 Sports Direct South 10959347
#> 8 Amazon Northeast 10562789
#> 9 West Gear Northeast 8587466
#> 10 Foot Locker West 8081554
#> # ℹ 18 more rows
#jumlah transaksi pada setiap retailer di masing2 region
trans_region <- aggregate(Total.Sales ~ Retailer + Region,
data = sales_clean,
FUN = length)
trans_region[order(trans_region$Total.Sales, decreasing = T),]
#> Retailer Region Total.Sales
#> 28 West Gear West 1207
#> 2 Foot Locker Midwest 840
#> 7 Foot Locker Northeast 835
#> 15 Sports Direct South 732
#> 25 Kohl's West 550
#> 6 Amazon Northeast 541
#> 9 Sports Direct Northeast 462
#> 20 Sports Direct Southeast 427
#> 19 Foot Locker Southeast 422
#> 17 West Gear South 408
#> 24 Foot Locker West 396
#> 5 West Gear Midwest 366
#> 16 Walmart South 366
#> 11 West Gear Northeast 302
#> 4 Sports Direct Midwest 286
#> 3 Kohl's Midwest 244
#> 8 Kohl's Northeast 170
#> 21 Walmart Southeast 150
#> 13 Foot Locker South 144
#> 1 Amazon Midwest 136
#> 18 Amazon Southeast 134
#> 23 Amazon West 126
#> 26 Sports Direct West 125
#> 22 West Gear Southeast 91
#> 10 Walmart Northeast 66
#> 14 Kohl's South 66
#> 27 Walmart West 44
#> 12 Amazon South 12
# Pembuatan Visual Statis
plot1 <-
ggplot(data = sales_region,
mapping = aes(x=Total.Sales, y=reorder(Retailer, Total.Sales))) +
geom_col(mapping = aes(fill=Region), position = "stack") +
labs(title = "Total Sales Based on Region",
x = "Total Sales",
y = "Retailer",
fill="Region") +
theme_classic()
# Mengubah Visual Statis menjadi Interaktif
ggplotly(plot1, tooltip = 'text')
#data untuk plot2
sales_unitsold <-
sales_clean %>%
filter(Product %in% "Men's Street Footwear") %>%
group_by(Retailer, Region) %>%
summarise(Units.Sold = sum(Units.Sold)) %>%
arrange(-Units.Sold)
sales_unitsold
#> # A tibble: 28 × 3
#> # Groups: Retailer [6]
#> Retailer Region Units.Sold
#> <fct> <fct> <int>
#> 1 West Gear West 73294
#> 2 Foot Locker Northeast 48459
#> 3 Sports Direct South 45756
#> 4 Foot Locker Midwest 43503
#> 5 Kohl's West 38004
#> 6 Foot Locker Southeast 34812
#> 7 Sports Direct Southeast 31110
#> 8 Amazon Northeast 26969
#> 9 Walmart South 23759
#> 10 West Gear South 23341
#> # ℹ 18 more rows
# Pembuatan Visual Statis
plot2 <-
ggplot(data = sales_unitsold,
mapping = aes(x=Units.Sold, y=reorder(Retailer, Units.Sold))) +
geom_col(mapping = aes(fill=Region), position = "stack") +
labs(title = "Total Unit Sold Based on Region",
x = "Total Unit Sold",
y = "Retailer",
fill="Region") +
theme_classic()
# Mengubah Visual Statis menjadi Interaktif
ggplotly(plot1, tooltip = 'text')